* Set the working directory to the main one of the project

clear
set more off

*** Income validation for the model:

**************************************************
** 1950:

import delimited using "model\results_model\log_income_pc_1950_forstata.csv", clear

rename v1 cz
rename v2 log_income_pc_1950_model
gen decade = 1950
merge 1:1 cz decade using dsets\cz_data\income\median_income_1950.dta, keep(match) nogen
gen log_income_pc_1950_data = ln(income_pc)

merge 1:1 cz decade using dsets\cz_data\dataset_reduced_form_cpp.dta, keep(match) nogen

sum log_income_pc_1950_data [weight=population]
replace log_income_pc_1950_data = log_income_pc_1950_data - `r(mean)'
sum log_income_pc_1950_model [weight=population]
replace log_income_pc_1950_model = log_income_pc_1950_model - `r(mean)'
sum log_population [weight=population]
gen log_pop_aux = log_population - `r(mean)'

binscatter log_income_pc_1950_data log_income_pc_1950_model log_population [weight=population], ///
	reportreg savedata("model\results_model\log_pop_income_model_data_1950") replace
* data: .1562992; const: -2.110533
* model: .1257428; const: -1.697926

keep cz log_income_pc_1950_data log_income_pc_1950_model population

twoway scatter log_income_pc_1950_data log_income_pc_1950_model [weight=population], msymbol(Oh)
reg log_income_pc_1950_data log_income_pc_1950_model [weight=population]
* 1.145333; const: 2.59e-10
* r2: 0.5055

keep log_income_pc_1950_data log_income_pc_1950_model population
export delimited using "model\results_model\log_income_model_data_1950", novarnames replace

**************************************************
** 1970:

import delimited using "model\results_model\log_income_pc_1970_forstata.csv", clear

rename v1 cz
rename v2 log_income_pc_1970_model
gen decade = 1970
merge 1:1 cz decade using dsets\cz_data\income\income_pc_1970.dta, keep(match) nogen
gen log_income_pc_1970_data = ln(income_pc)

merge 1:1 cz decade using dsets\cz_data\dataset_reduced_form_cpp.dta, keep(match) nogen

sum log_income_pc_1970_data [weight=population]
replace log_income_pc_1970_data = log_income_pc_1970_data - `r(mean)'
sum log_income_pc_1970_model [weight=population]
replace log_income_pc_1970_model = log_income_pc_1970_model - `r(mean)'
sum log_population [weight=population]
gen log_pop_aux = log_population - `r(mean)'

binscatter log_income_pc_1970_data log_income_pc_1970_model log_population [weight=population], ///
	reportreg savedata("model\results_model\log_pop_income_model_data_1970") replace
* data: .1002017; const: -1.39479
* model: .085272; const: -1.186971
   
keep cz log_income_pc_1970_data log_income_pc_1970_model population

twoway scatter log_income_pc_1970_data log_income_pc_1970_model [weight=population], msymbol(Oh)
reg log_income_pc_1970_data log_income_pc_1970_model [weight=population]
* .6568693; const: 2.00e-09
* r2: 0.5326

keep log_income_pc_1970_data log_income_pc_1970_model population
export delimited using "model\results_model\log_income_model_data_1970", novarnames replace

**************************************************
** 1990:

import delimited using "model\results_model\log_income_pc_1990_forstata.csv", clear

rename v1 cz
rename v2 log_income_pc_1990_model
gen decade = 1990
merge 1:1 cz decade using dsets\cz_data\income\income_pc_1990_2010.dta, keep(match) nogen
gen log_income_pc_1990_data = ln(income_pc)

merge 1:1 cz decade using dsets\cz_data\dataset_reduced_form_cpp.dta, keep(match) nogen

sum log_income_pc_1990_data [weight=population]
replace log_income_pc_1990_data = log_income_pc_1990_data - `r(mean)'
sum log_income_pc_1990_model [weight=population]
replace log_income_pc_1990_model = log_income_pc_1990_model - `r(mean)'
sum log_population [weight=population]
gen log_pop_aux = log_population - `r(mean)'

binscatter log_income_pc_1990_data log_income_pc_1990_model log_population [weight=population], ///
	reportreg savedata("model\results_model\log_pop_income_model_data_1990") replace
* data: .1176658; const:   -1.659871
* model: .0954792; const: -1.346892
                     
keep cz log_income_pc_1990_data log_income_pc_1990_model population

twoway scatter log_income_pc_1990_data log_income_pc_1990_model [weight=population], msymbol(Oh)
reg log_income_pc_1990_data log_income_pc_1990_model [weight=population]
* .7450224; const: 8.58e-10
* r2: 0.5482

keep log_income_pc_1990_data log_income_pc_1990_model population
export delimited using "model\results_model\log_income_model_data_1990", novarnames replace

**************************************************
** 2010:

import delimited using "model\results_model\log_income_pc_2010_forstata.csv", clear

rename v1 cz
rename v2 log_income_pc_2010_model
gen decade = 2010
merge 1:1 cz decade using dsets\cz_data\income\income_pc_1990_2010.dta, keep(match) nogen
gen log_income_pc_2010_data = ln(income_pc)

merge 1:1 cz decade using dsets\cz_data\dataset_reduced_form_cpp.dta, keep(match) nogen

sum log_income_pc_2010_data [weight=population]
replace log_income_pc_2010_data = log_income_pc_2010_data - `r(mean)'
sum log_income_pc_2010_model [weight=population]
replace log_income_pc_2010_model = log_income_pc_2010_model - `r(mean)'
sum log_population [weight=population]
gen log_pop_aux = log_population - `r(mean)'

binscatter log_income_pc_2010_data log_income_pc_2010_model log_population [weight=population], ///
	reportreg savedata("model\results_model\log_pop_income_model_data_2010") replace
* data: .0912406; const: -1.308275
* model: .0737152; const: -1.056983
                     
keep cz log_income_pc_2010_data log_income_pc_2010_model population

twoway scatter log_income_pc_2010_data log_income_pc_2010_model [weight=population], msymbol(Oh)
reg log_income_pc_2010_data log_income_pc_2010_model [weight=population]
* .668712; const:  1.17e-09
* r2: 0.4743

keep log_income_pc_2010_data log_income_pc_2010_model population
export delimited using "model\results_model\log_income_model_data_2010", novarnames replace

*******************************************************************
*******************************************************************

** Income at the city-sector level:

**************************************************
** 1990:

clear
set matsize 5000
import delimited using "model\results_model\income_py_bysector_1990_forstata.csv", clear
rename v1 cz
reshape long v, i(cz) j(macroipc_id)
rename v income_py_model_1990
replace macroipc_id = macroipc_id - 1

gen log_income_model = ln(income_py_model_1990)

merge 1:1 macroipc_id cz using dsets\cz_data\income\inc_pc_cz_macroipc_1990.dta, keep(match) nogen

gen log_income_data = ln(inc_pc_cz_macroipc)

binscatter log_income_data log_income_model [weight = tot_perwt_cz_macroipc], controls(i.cz i.macroipc_id) n(100) ///
	reportreg savedata("model\results_model\log_income_city_sector_1990") replace 
* .3315499; const: 9.419854

**************************************************
** 2010:

clear
set matsize 5000
import delimited using "model\results_model\income_py_bysector_2010_forstata.csv", clear
rename v1 cz
reshape long v, i(cz) j(macroipc_id)
rename v income_py_model_2010
replace macroipc_id = macroipc_id - 1

gen log_income_model = ln(income_py_model_2010)

merge 1:1 macroipc_id cz using dsets\cz_data\income\inc_pc_cz_macroipc_2010.dta, keep(match) nogen

gen log_income_data = ln(inc_pc_cz_macroipc)

binscatter log_income_data log_income_model [weight = tot_perwt_cz_macroipc], controls(i.cz i.macroipc_id) n(100) ///
	reportreg savedata("model\results_model\log_income_city_sector_2010") replace 
* .4026786; const: 9.732466

